#!/usr/bin/env python3
from sklearn.cluster import KMeans
from sklearn.externals import joblib
import numpy
import matplotlib.pyplot as plt
import csv
import pandas as pd

'''
    将高校数据按坐标和论文数进行聚类
'''
def main():
    data = pd.read_csv("lat_lng_count_papers.csv")
    # with open('lat_lng_count_papers.csv',)
    list = []
    for i in range(len(data)):
        document = data[i:i+1]
        # print(document)
        data_list = []
        # print(document['lat'][i])
        data_list.append(document['lat'][i])
        data_list.append(document['lng'][i])
        data_list.append(document['count_papers'][i])
        list.append(data_list)

    #训练数据
    clf = KMeans(n_clusters = 2)
    s = clf.fit(list)

    cent = clf.labels_
    # print(cent)

    mark = ['or','ob']

    for i in range(len(data)):
        # print(cent[i])
        if cent[i] == 1:
            plt.plot(list[i][0],list[i][1],list[i][2],c = 'r')
        else:
            plt.plot(list[i][0],list[i][1],list[i][2],c = 'g')

    plt.show()






































if __name__ == '__main__':
    main()
